/* ffAliHelp - Helper routines for things that produce (rather than just
 * consume) ffAli type alignments. */
/* Copyright 2000-2003 Jim Kent.  All rights reserved. */

#include "common.h"
#include "fuzzyFind.h"
#include "dnaseq.h"

void ffCat(struct ffAli **pA, struct ffAli **pB)
/* Concatenate B to the end of A. Eat up second list
 * in process. */
{
  struct ffAli *a = *pA;
  struct ffAli *b = *pB;

  /* If list to add is empty our job is real easy. */
  if (b == NULL)
    return;

  /* If list to add into is empty, then just switch in the
   * second list. */
  if (a == NULL) {
    *pA = *pB;
    *pB = NULL;
    return;
  }

  /* Neither list empty.  Find rightmost element of first list
   * and cross-link it with leftmost element of second list. */
  while (a->right != NULL)
    a = a->right;
  b->left = a;
  a->right = b;
  *pB = NULL;
}

void ffAliSort(struct ffAli **pList,
               int (*compare)(const void *elem1, const void *elem2))
/* Sort a doubly linked list of ffAlis. */
{
  /* Get head of list and handle easy special empty case. */
  struct ffAli *r = *pList;
  if (r == NULL)
    return;

  /* Since first pointer is "left", in order to reuse slSort, have
   * to jump through some minor hoops. First go to right end of list,
   * then sort it. */
  while (r->right)
    r = r->right;
  slSort(&r, compare);

  /* We're sorted, but our right links are all broken.  Fix this. */
  slReverse(&r);
  r = ffMakeRightLinks(r);
  *pList = r;
}

int ffCmpHitsHayFirst(const void *va, const void *vb)
/* Compare function to sort hit array by ascending
 * target offset followed by ascending query offset. */
{
  const struct ffAli *a = *((struct ffAli **)va);
  const struct ffAli *b = *((struct ffAli **)vb);
  int diff;
  if ((diff = a->hStart - b->hStart) != 0)
    return diff;
  return a->nStart - b->nStart;
}

int ffCmpHitsNeedleFirst(const void *va, const void *vb)
/* Compare function to sort hit array by ascending
 * query offset followed by ascending target offset. */
{
  const struct ffAli *a = *((struct ffAli **)va);
  const struct ffAli *b = *((struct ffAli **)vb);
  int diff;
  if ((diff = a->nStart - b->nStart) != 0)
    return diff;
  return a->hStart - b->hStart;
}

void ffExpandExactRight(struct ffAli *ali, DNA *needleEnd, DNA *hayEnd)
/* Expand aligned segment to right as far as can exactly. */
{
  DNA *nEnd = ali->nEnd;
  DNA *hEnd = ali->hEnd;
  while (nEnd < needleEnd && hEnd < hayEnd) {
    if (*nEnd != *hEnd)
      break;
    nEnd += 1;
    hEnd += 1;
  }
  ali->nEnd = nEnd;
  ali->hEnd = hEnd;
  return;
}

void ffExpandExactLeft(struct ffAli *ali, DNA *needleStart, DNA *hayStart)
/* Expand aligned segment to left as far as can exactly. */
{
  DNA *nStart = ali->nStart - 1;
  DNA *hStart = ali->hStart - 1;
  while (nStart >= needleStart && hStart >= hayStart) {
    if (*nStart != *hStart)
      break;
    nStart -= 1;
    hStart -= 1;
  }
  ali->nStart = nStart + 1;
  ali->hStart = hStart + 1;
  return;
}

struct ffAli *ffMergeClose(struct ffAli *aliList, DNA *needleStart,
                           DNA *hayStart)
/* Remove overlapping areas needle in alignment. Assumes ali is sorted on
 * ascending nStart field. Also merge perfectly abutting neighbors or
 * ones that could be merged at the expense of just a few mismatches.*/
{
  struct ffAli *mid, *ali;
  int closeEnough = -3;

  if (aliList == NULL)
    return NULL;
  for (mid = aliList->right; mid != NULL; mid = mid->right) {
    for (ali = aliList; ali != mid; ali = ali->right) {
      char *nStart, *nEnd;
      int nOverlap;
      nStart = max(ali->nStart, mid->nStart);
      nEnd = min(ali->nEnd, mid->nStart);
      nOverlap = nEnd - nStart;
      /* Overlap or perfectly abut in needle, and needle/hay
       * offset the same. */
      if (nOverlap >= closeEnough) {
        int aliDiag = (ali->nStart - needleStart) - (ali->hStart - hayStart);
        int midDiag = (mid->nStart - needleStart) - (mid->hStart - hayStart);
        if (aliDiag == midDiag) {
          /* Make mid encompass both, and make ali empty. */
          mid->nStart = min(ali->nStart, mid->nStart);
          mid->nEnd = max(ali->nEnd, mid->nEnd);
          mid->hStart = min(ali->hStart, mid->hStart);
          mid->hEnd = max(ali->hEnd, mid->hEnd);
          ali->hStart = ali->hEnd = mid->hStart;
          ali->nEnd = ali->nStart = mid->nStart;
          ;
        }
      }
    }
  }
  aliList = ffRemoveEmptyAlis(aliList, TRUE);
  return aliList;
}

int ffScoreIntron(DNA a, DNA b, DNA y, DNA z, int orientation)
/* Return a better score the closer an intron is to
 * consensus. */
{
  int score = 0;
  int revScore = 0;

  if (orientation >= 0) {
    if (a == 'g' || a == 'G')
      ++score;
    if (b == 't' || b == 'T')
      ++score;
    if (y == 'a' || y == 'A')
      ++score;
    if (z == 'g' || z == 'G')
      ++score;
  }

  if (orientation <= 0) {
    if (a == 'c' || a == 'C')
      ++revScore;
    if (b == 't' || b == 'T')
      ++revScore;
    if (y == 'a' || y == 'A')
      ++revScore;
    if (z == 'c' || z == 'C')
      ++revScore;
  }

  return score > revScore ? score : revScore;
}

static int slideIntron(struct ffAli *left, struct ffAli *right, int orientation)
/* Slides space between alignments if possible to match
 * intron consensus better.  Returns how much it slid intron. */
{
  DNA *nLeft = left->nEnd;
  DNA *hLeft = left->hEnd;
  DNA *nRight = right->nStart;
  DNA *hRight = right->hStart;
  DNA nl, nr, hl, hr;
  DNA *nLeftEnd = left->nStart;
  DNA *nRightEnd = right->nEnd;
  DNA *nBestLeft = NULL;
  int bestScore = -0x7fffffff;
  int curScore;
  int offset;

  if (hRight - hLeft < 4) /* Too short to be an intron. */
    return 0;
  if (nRight - nLeft > 2) /* Too big of a gap to be an intron. */
    return 0;

  /* Slide as far to the left as possible without inserting mismatches. */
  while (nLeft > nLeftEnd) {
    nl = nLeft[-1];
    hl = hLeft[-1];
    nr = nRight[-1];
    hr = hRight[-1];
    if (!(nl == 'n' && nr == 'n')) /* N's in needle freely slide. */
    {
      if (nr != hr)
        break;
    }
    nLeft -= 1;
    hLeft -= 1;
    nRight -= 1;
    hRight -= 1;
  }
  /* Slide as far to the right as possible computing
     intron score as you go. */
  while (nRight < nRightEnd) {
    curScore =
        ffScoreIntron(hLeft[0], hLeft[1], hRight[-2], hRight[-1], orientation);
    if (curScore > bestScore) {
      bestScore = curScore;
      nBestLeft = nLeft;
    }
    nl = nLeft[0];
    hl = hLeft[0];
    if (nl != 'n' && nl != hl)
      break;
    nr = nRight[0];
    hr = hRight[0];
    nLeft += 1;
    hLeft += 1;
    nRight += 1;
    hRight += 1;
  }
  if (nBestLeft == NULL)
    return 0;
  offset = nBestLeft - left->nEnd;
  if (offset == 0)
    return offset;
  left->nEnd += offset;
  left->hEnd += offset;
  right->nStart += offset;
  right->hStart += offset;
  return offset;
}

boolean ffSlideOrientedIntrons(struct ffAli *ali, int orient)
/* Slide introns (or spaces between aligned blocks)
 * to match consensus on given strand. */
{
  struct ffAli *left = ali, *right;
  boolean slid = FALSE;
  if (left == NULL)
    return FALSE;
  while ((right = left->right) != NULL) {
    if (slideIntron(left, right, orient))
      slid = TRUE;
    left = right;
  }
  return slid;
}

boolean ffSlideIntrons(struct ffAli *ali)
/* Slide introns (or spaces between aligned blocks)
 * to match consensus.  Return TRUE if any slid. */
{
  int orient = ffIntronOrientation(ali);
  return ffSlideOrientedIntrons(ali, orient);
}

struct ffAli *ffRemoveEmptyAlis(struct ffAli *ali, boolean doFree)
/* Remove empty blocks from list. Optionally free empties too. */
{
  struct ffAli *leftAli;
  struct ffAli *startAli;
  struct ffAli *rightAli;

  if (ali == NULL)
    return NULL;

  /* Figure out left most non-empty ali. */
  while (ali->left)
    ali = ali->left;
  while (ali) {
    /* If current ali is empty, chuck it out. */
    if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart) {
      struct ffAli *empty = ali;
      ali = ali->right;
      if (doFree)
        freeMem(empty);
    } else
      break;
  }

  if (ali == NULL)
    return NULL;

  ali->left = NULL;

  /* Get rid of empty middle alis. */
  startAli = leftAli = ali;
  ali = ali->right;
  while (ali) {
    rightAli = ali->right;
    if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart) {
      leftAli->right = rightAli;
      if (rightAli != NULL)
        rightAli->left = leftAli;
      if (doFree)
        freeMem(ali);
    } else {
      leftAli = ali;
    }
    ali = rightAli;
  }
  return startAli;
}

struct ffAli *ffMergeHayOverlaps(struct ffAli *ali)
/* Remove overlaps in haystack that perfectly abut in needle.
 * These are transformed into perfectly abutting haystacks
 * that have a gap in the needle. */
{
  struct ffAli *a = NULL;
  struct ffAli *leftA = NULL;

  if (ali == NULL)
    return NULL;
  a = ali;
  for (;;) {
    int nOverlap;
    int hOverlap;
    int aSize;

    /* Advance to next ali */
    leftA = a;
    a = a->right;
    if (a == NULL)
      break;

    nOverlap = leftA->nEnd - a->nStart;
    hOverlap = leftA->hEnd - a->hStart;
    aSize = a->nEnd - a->nStart;
    if (hOverlap > 0 && hOverlap < aSize && nOverlap <= 0) {
      a->hStart += hOverlap;
      a->nStart += hOverlap;
    }
  }
  return ali;
}

struct ffAli *ffMergeNeedleAlis(struct ffAli *ali, boolean doFree)
/* Remove overlapping areas needle in alignment. Assumes ali is sorted on
 * ascending nStart field. Also merge perfectly abutting neighbors.*/
{
  struct ffAli *a = NULL;
  struct ffAli *leftA = NULL;
  struct ffAli *rightA;

  if (ali == NULL)
    return NULL;
  rightA = ali;
  for (;;) {
    /* Advance to next ali */
    leftA = a;
    a = rightA;
    if (a == NULL)
      break;
    rightA = a->right;

    /* See if can merge current alignment into left one. */
    if (leftA != NULL) {
      int overlap = leftA->nEnd - a->nStart;

      /* Deal with overlaps in needle */
      if (overlap > 0) {
        /* See if left encompasses current segment. */
        if (leftA->nStart <= a->nStart && leftA->nEnd >= a->nEnd) {
          /* Eliminate current segment. */
          leftA->right = rightA;
          if (rightA != NULL)
            rightA->left = leftA;
          if (doFree)
            freeMem(a);
          a = leftA;
        } else {
          /* Remove overlapping area from current segment, leave
           * it in left segment. */
          a->hStart += overlap;
          a->nStart += overlap;
        }
      } else if (overlap == 0 && leftA->hEnd == a->hStart) {
        /* Remove current segment from list. */
        leftA->right = rightA;
        if (rightA != NULL)
          rightA->left = leftA;
        /* Fold data from current segment into left segment */
        leftA->nEnd = a->nEnd;
        leftA->hEnd = a->hEnd;
        if (doFree)
          freeMem(a);
        a = leftA;
      }
    }
  }
  return ali;
}
